"""
Arxiv文章摘要
"""
from langchain_core.prompts import PromptTemplate, format_document
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import ArxivLoader
from langchain_ollama import ChatOllama
from langchain.text_splitter import RecursiveCharacterTextSplitter
ollama_client = ChatOllama(base_url="10.12.8.21:11434", model="qwen2.5:14b")
loader = ArxivLoader(query="2410.09699", load_max_docs=1)
docs = loader.load()
print(docs[0].metadata)

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=0
)
chunks = text_splitter.split_documents(docs)

doc_prompt = PromptTemplate.from_template("{page_content}")
chain = (
    {
        "content": lambda docs:"\n\n".join(format_document(doc, doc_prompt) for doc in docs)
    }
    | PromptTemplate.from_template("使用中文总结以下内容， 不需要人物介绍，字数控制在500字以内：\n\n{content}")
    | ollama_client
    | StrOutputParser()
)

result = chain.invoke(chunks[:8])
print(result)
